#hide !pip install nbdev from nbdev import * ! nbdev_upgrade
This chapter has brought to you in collaboration by Brian Kelly, Michael Vandi, Logan Shertz, and Charles Karpati.
Dataset: Scooter data:
File path's will vary
# cd ./drive/'My Drive'/BNIA/responsive_records/Routes# cd ../content/drive/My Drive/DATA/scooter/content/drive/My Drive/DATA/scooter # Michael's Directory # cd ./drive/'My Drive'/BNIA/'Scooter Use Data'/BNIAcd ./Routes/content/drive/My Drive/DATA/scooter/Routes # hide_output ! ls leftRouts.csv 'Routing November 2019.geojson' rightRouts.csv 'Routing October 2019.geojson' 'Routing August 2019.geojson' 'Routing September 2019.geojson' 'Routing December 2019.geojson' # hide_output !cd ../ && ls boundsdf.csv rightRouts.csv 'Trip origins-destinations by month' Deployment Routes leftRouts.csv scooterdf.csv| id | name | color | radius | value | geometry | |
|---|---|---|---|---|---|---|
| 0 | 1 | Block 245102502063018 | #fff | 4.999286 | NaN | POLYGON ((-76.66168 39.26342, -76.66136 39.26373, -76.66096 39.26343, -76.66041 39.26306, -76.66053 39.26296, -76.66070 39.26281, -76.66077 39.26284, -76.66100 39.26296, -76.66117 39.26306, -76.66129 39.26315, -76.66141 39.26322, -76.66168 39.26342)) |
| 1 | 2 | Block 245102006001022 | #fff | 4.999286 | NaN | POLYGON ((-76.66319 39.28427, -76.66306 39.28436, -76.66301 39.28439, -76.66293 39.28439, -76.66271 39.28418, -76.66303 39.28402, -76.66317 39.28417, -76.66320 39.28423, -76.66319 39.28427)) |
| 2 | 3 | Block 245102804033017 | #fff | 4.999286 | NaN | POLYGON ((-76.71122 39.27927, -76.71121 39.27975, -76.71121 39.27990, -76.71119 39.28076, -76.71045 39.27932, -76.71045 39.27920, -76.71056 39.27907, -76.71123 39.27880, -76.71122 39.27927)) |
| 3 | 4 | Block 245102716006003 | #fff | 4.999286 | NaN | POLYGON ((-76.66960 39.34512, -76.66905 39.34526, -76.66886 39.34531, -76.66848 39.34539, -76.66835 39.34502, -76.66867 39.34498, -76.66924 39.34484, -76.66946 39.34475, -76.66960 39.34512)) |
| 4 | 5 | Block 245102709023013 | #fff | 4.999286 | NaN | POLYGON ((-76.58976 39.35401, -76.58972 39.35415, -76.58966 39.35435, -76.58964 39.35447, -76.58960 39.35469, -76.58940 39.35467, -76.58925 39.35465, -76.58912 39.35465, -76.58902 39.35464, -76.58898 39.35464, -76.58906 39.35425, -76.58917 39.35385, -76.58925 39.35385, -76.58939 39.35386, -76.58979 39.35393, -76.58976 39.35401)) |
| CSA2010 | NumberMissingCount | |
|---|---|---|
| 0 | Allendale/Irvington/S. Hilton | 290 |
| 1 | Beechfield/Ten Hills/West Hills | 180 |
| 2 | Belair-Edison | 250 |
| 3 | Brooklyn/Curtis Bay/Hawkins Point | 580 |
| 4 | Canton | 0 |
Ensure merge is on consistent datatypes
gdf['GEOID10'] = gdf['GEOID10'].astype(str) scooterdf['nameChange2'] = scooterdf['nameChange2'].astype(str)Perform the merge
scooterdfClean = gdf.merge(scooterdf, left_on='GEOID10', right_on='nameChange2').drop(['name', 'nameChange1', 'nameChange2'], axis=1)scooterdfClean.head()# scooterdf.to_csv('./scooterdf.csv', index=False) # gdf.drop(columns='geometry').to_csv('./boundsdf.csv', index=False)lsscooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.value.isna().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='notApplicable')scooterdfClean.value.notnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NotMissingCount')scooterdfClean.value.isnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NumberMissingCount')scooterdfClean.fillna(-1).groupby('CSA')['value'].sum()scooterdfClean.groupby('CSA')['value'].mean()scooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.CSA.value_counts()https://pandas.pydata.org/docs/getting_started/intro_tutorials/06_calculate_statistics.html| id | color | streetname | trip_count_sum | trip_count_average | trip_count_percent | geometry | |
|---|---|---|---|---|---|---|---|
| 0 | 150197772 | rgb(218, 231, 241) | Jefferson Street | 24 | 0.774194 | 0.0% | LINESTRING (-76.58576 39.29660, -76.58550 39.29661) |
| 1 | 150155955 | rgb(164, 190, 219) | 206 | 6.645161 | 0.3% | LINESTRING (-76.61320 39.28136, -76.61318 39.28115) | |
| 2 | 150191673 | rgb(204, 221, 236) | Harford Avenue | 49 | 1.580645 | 0.1% | LINESTRING (-76.60169 39.30535, -76.60163 39.30544, -76.60156 39.30555) |
| 3 | 150184657 | rgb(229, 239, 246) | 11 | 0.354839 | 0.0% | LINESTRING (-76.61493 39.29294, -76.61491 39.29258, -76.61490 39.29230) | |
| 4 | 150188407 | rgb(169, 194, 221) | 178 | 5.741935 | 0.2% | LINESTRING (-76.61662 39.29053, -76.61661 39.29046) |
Clean the gdf of empties.
gdf = gdf[~gdf.isna()] gdf = gdf[~gdf.is_empty]Now get the extremities; this will take a minute.
# hide_output gdf['lefty'], gdf['leftx'],gdf['righty'], gdf['rightx'] = zip(*gdf["geometry"].map(split))Split the gdf into a left and right dataset
gdf_left = gdf.copy() gdf_left = gdf_left.drop(columns = ['geometry','rightx', 'righty']) gdf_right= gdf.copy() gdf_right = gdf_right.drop(columns = ['geometry','leftx', 'lefty', 'streetname', 'trip_count_sum', 'trip_count_average', 'trip_count_percent', 'color' ])The coordinate variables of object type will cause problems.
gdf_right.dtypesid int64 righty float64 rightx object dtype: objectLet's go ahead and coerce a correction.
gdf_right['righty']=pd.to_numeric(gdf_right['righty'], errors='coerce') gdf_left['lefty']=pd.to_numeric(gdf_left['lefty'], errors='coerce') gdf_right['rightx']=pd.to_numeric(gdf_right['rightx'], errors='coerce') gdf_left['leftx']=pd.to_numeric(gdf_left['leftx'], errors='coerce')Now we can view the results
gdf_right.dtypesid int64 righty float64 rightx float64 dtype: objectSave these csv's because it took a minute to get to where we are now.
gdf_right.to_csv('rightRouts.csv') gdf_left.to_csv('leftRouts.csv')Convert the datasets to geodataframes for further exploration!
# We could create a geodataframe like this #temp_gdf = gpd.GeoDataFrame( gdf_right, geometry=gpd.points_from_xy(gdf_right.rightx, gdf_right.righty) ) #temp_gdf.head() # Alternately this could work.. unfinished.. but wkt.loads can make a Point from text # gdf_right['strCol']=gdf_right['rightx'].astype(str) # gdf_right['geometry'] = gdf_right['strCol'].apply(wkt.loads)# hide_output left_csaMap = readInGeometryData(url=gdf_left, porg='p', geom= False, lat= 'leftx', lng= 'lefty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)# hide_output right_csaMap = readInGeometryData(url=gdf_right, porg='p', geom= False, lat= 'rightx', lng= 'righty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)right_csaMap.head()| id | righty | rightx | geometry | |
|---|---|---|---|---|
| 0 | 150197772 | -76.585503 | 39.296607 | POINT (-76.58550 39.29661) |
| 1 | 150155955 | -76.613183 | 39.281147 | POINT (-76.61318 39.28115) |
| 2 | 150191673 | -76.601555 | 39.305545 | POINT (-76.60156 39.30555) |
| 3 | 150184657 | -76.614899 | 39.292301 | POINT (-76.61490 39.29230) |
| 4 | 150188407 | -76.616608 | 39.290458 | POINT (-76.61661 39.29046) |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |
This chapter has brought to you in collaboration by Brian Kelly, Michael Vandi, Logan Shertz, and Charles Karpati.
Dataset: Scooter data:
File path's will vary
# cd ./drive/'My Drive'/BNIA/responsive_records/Routes# cd ../content/drive/My Drive/DATA/scooter/content/drive/My Drive/DATA/scooter # Michael's Directory # cd ./drive/'My Drive'/BNIA/'Scooter Use Data'/BNIAcd ./Routes/content/drive/My Drive/DATA/scooter/Routes # hide_output ! ls leftRouts.csv 'Routing November 2019.geojson' rightRouts.csv 'Routing October 2019.geojson' 'Routing August 2019.geojson' 'Routing September 2019.geojson' 'Routing December 2019.geojson' # hide_output !cd ../ && ls boundsdf.csv rightRouts.csv 'Trip origins-destinations by month' Deployment Routes leftRouts.csv scooterdf.csv| id | name | color | radius | value | geometry | |
|---|---|---|---|---|---|---|
| 0 | 1 | Block 245102502063018 | #fff | 4.999286 | NaN | POLYGON ((-76.66168 39.26342, -76.66136 39.26373, -76.66096 39.26343, -76.66041 39.26306, -76.66053 39.26296, -76.66070 39.26281, -76.66077 39.26284, -76.66100 39.26296, -76.66117 39.26306, -76.66129 39.26315, -76.66141 39.26322, -76.66168 39.26342)) |
| 1 | 2 | Block 245102006001022 | #fff | 4.999286 | NaN | POLYGON ((-76.66319 39.28427, -76.66306 39.28436, -76.66301 39.28439, -76.66293 39.28439, -76.66271 39.28418, -76.66303 39.28402, -76.66317 39.28417, -76.66320 39.28423, -76.66319 39.28427)) |
| 2 | 3 | Block 245102804033017 | #fff | 4.999286 | NaN | POLYGON ((-76.71122 39.27927, -76.71121 39.27975, -76.71121 39.27990, -76.71119 39.28076, -76.71045 39.27932, -76.71045 39.27920, -76.71056 39.27907, -76.71123 39.27880, -76.71122 39.27927)) |
| 3 | 4 | Block 245102716006003 | #fff | 4.999286 | NaN | POLYGON ((-76.66960 39.34512, -76.66905 39.34526, -76.66886 39.34531, -76.66848 39.34539, -76.66835 39.34502, -76.66867 39.34498, -76.66924 39.34484, -76.66946 39.34475, -76.66960 39.34512)) |
| 4 | 5 | Block 245102709023013 | #fff | 4.999286 | NaN | POLYGON ((-76.58976 39.35401, -76.58972 39.35415, -76.58966 39.35435, -76.58964 39.35447, -76.58960 39.35469, -76.58940 39.35467, -76.58925 39.35465, -76.58912 39.35465, -76.58902 39.35464, -76.58898 39.35464, -76.58906 39.35425, -76.58917 39.35385, -76.58925 39.35385, -76.58939 39.35386, -76.58979 39.35393, -76.58976 39.35401)) |
| CSA2010 | NumberMissingCount | |
|---|---|---|
| 0 | Allendale/Irvington/S. Hilton | 290 |
| 1 | Beechfield/Ten Hills/West Hills | 180 |
| 2 | Belair-Edison | 250 |
| 3 | Brooklyn/Curtis Bay/Hawkins Point | 580 |
| 4 | Canton | 0 |
Ensure merge is on consistent datatypes
gdf['GEOID10'] = gdf['GEOID10'].astype(str) scooterdf['nameChange2'] = scooterdf['nameChange2'].astype(str)Perform the merge
scooterdfClean = gdf.merge(scooterdf, left_on='GEOID10', right_on='nameChange2').drop(['name', 'nameChange1', 'nameChange2'], axis=1)scooterdfClean.head()# scooterdf.to_csv('./scooterdf.csv', index=False) # gdf.drop(columns='geometry').to_csv('./boundsdf.csv', index=False)lsscooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.value.isna().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='notApplicable')scooterdfClean.value.notnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NotMissingCount')scooterdfClean.value.isnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NumberMissingCount')scooterdfClean.fillna(-1).groupby('CSA')['value'].sum()scooterdfClean.groupby('CSA')['value'].mean()scooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.CSA.value_counts()https://pandas.pydata.org/docs/getting_started/intro_tutorials/06_calculate_statistics.html| id | color | streetname | trip_count_sum | trip_count_average | trip_count_percent | geometry | |
|---|---|---|---|---|---|---|---|
| 0 | 150197772 | rgb(218, 231, 241) | Jefferson Street | 24 | 0.774194 | 0.0% | LINESTRING (-76.58576 39.29660, -76.58550 39.29661) |
| 1 | 150155955 | rgb(164, 190, 219) | 206 | 6.645161 | 0.3% | LINESTRING (-76.61320 39.28136, -76.61318 39.28115) | |
| 2 | 150191673 | rgb(204, 221, 236) | Harford Avenue | 49 | 1.580645 | 0.1% | LINESTRING (-76.60169 39.30535, -76.60163 39.30544, -76.60156 39.30555) |
| 3 | 150184657 | rgb(229, 239, 246) | 11 | 0.354839 | 0.0% | LINESTRING (-76.61493 39.29294, -76.61491 39.29258, -76.61490 39.29230) | |
| 4 | 150188407 | rgb(169, 194, 221) | 178 | 5.741935 | 0.2% | LINESTRING (-76.61662 39.29053, -76.61661 39.29046) |
Clean the gdf of empties.
gdf = gdf[~gdf.isna()] gdf = gdf[~gdf.is_empty]Now get the extremities; this will take a minute.
# hide_output gdf['lefty'], gdf['leftx'],gdf['righty'], gdf['rightx'] = zip(*gdf["geometry"].map(split))Split the gdf into a left and right dataset
gdf_left = gdf.copy() gdf_left = gdf_left.drop(columns = ['geometry','rightx', 'righty']) gdf_right= gdf.copy() gdf_right = gdf_right.drop(columns = ['geometry','leftx', 'lefty', 'streetname', 'trip_count_sum', 'trip_count_average', 'trip_count_percent', 'color' ])The coordinate variables of object type will cause problems.
gdf_right.dtypesid int64 righty float64 rightx object dtype: objectLet's go ahead and coerce a correction.
gdf_right['righty']=pd.to_numeric(gdf_right['righty'], errors='coerce') gdf_left['lefty']=pd.to_numeric(gdf_left['lefty'], errors='coerce') gdf_right['rightx']=pd.to_numeric(gdf_right['rightx'], errors='coerce') gdf_left['leftx']=pd.to_numeric(gdf_left['leftx'], errors='coerce')Now we can view the results
gdf_right.dtypesid int64 righty float64 rightx float64 dtype: objectSave these csv's because it took a minute to get to where we are now.
gdf_right.to_csv('rightRouts.csv') gdf_left.to_csv('leftRouts.csv')Convert the datasets to geodataframes for further exploration!
# We could create a geodataframe like this #temp_gdf = gpd.GeoDataFrame( gdf_right, geometry=gpd.points_from_xy(gdf_right.rightx, gdf_right.righty) ) #temp_gdf.head() # Alternately this could work.. unfinished.. but wkt.loads can make a Point from text # gdf_right['strCol']=gdf_right['rightx'].astype(str) # gdf_right['geometry'] = gdf_right['strCol'].apply(wkt.loads)# hide_output left_csaMap = readInGeometryData(url=gdf_left, porg='p', geom= False, lat= 'leftx', lng= 'lefty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)# hide_output right_csaMap = readInGeometryData(url=gdf_right, porg='p', geom= False, lat= 'rightx', lng= 'righty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)right_csaMap.head()| id | righty | rightx | geometry | |
|---|---|---|---|---|
| 0 | 150197772 | -76.585503 | 39.296607 | POINT (-76.58550 39.29661) |
| 1 | 150155955 | -76.613183 | 39.281147 | POINT (-76.61318 39.28115) |
| 2 | 150191673 | -76.601555 | 39.305545 | POINT (-76.60156 39.30555) |
| 3 | 150184657 | -76.614899 | 39.292301 | POINT (-76.61490 39.29230) |
| 4 | 150188407 | -76.616608 | 39.290458 | POINT (-76.61661 39.29046) |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |
This chapter has brought to you in collaboration by Brian Kelly, Michael Vandi, Logan Shertz, and Charles Karpati.
Dataset: Scooter data:
File path's will vary
# cd ./drive/'My Drive'/BNIA/responsive_records/Routes# cd ../content/drive/My Drive/DATA/scooter/content/drive/My Drive/DATA/scooter # Michael's Directory # cd ./drive/'My Drive'/BNIA/'Scooter Use Data'/BNIAcd ./Routes/content/drive/My Drive/DATA/scooter/Routes # hide_output ! ls leftRouts.csv 'Routing November 2019.geojson' rightRouts.csv 'Routing October 2019.geojson' 'Routing August 2019.geojson' 'Routing September 2019.geojson' 'Routing December 2019.geojson' # hide_output !cd ../ && ls boundsdf.csv rightRouts.csv 'Trip origins-destinations by month' Deployment Routes leftRouts.csv scooterdf.csv| id | name | color | radius | value | geometry | |
|---|---|---|---|---|---|---|
| 0 | 1 | Block 245102502063018 | #fff | 4.999286 | NaN | POLYGON ((-76.66168 39.26342, -76.66136 39.26373, -76.66096 39.26343, -76.66041 39.26306, -76.66053 39.26296, -76.66070 39.26281, -76.66077 39.26284, -76.66100 39.26296, -76.66117 39.26306, -76.66129 39.26315, -76.66141 39.26322, -76.66168 39.26342)) |
| 1 | 2 | Block 245102006001022 | #fff | 4.999286 | NaN | POLYGON ((-76.66319 39.28427, -76.66306 39.28436, -76.66301 39.28439, -76.66293 39.28439, -76.66271 39.28418, -76.66303 39.28402, -76.66317 39.28417, -76.66320 39.28423, -76.66319 39.28427)) |
| 2 | 3 | Block 245102804033017 | #fff | 4.999286 | NaN | POLYGON ((-76.71122 39.27927, -76.71121 39.27975, -76.71121 39.27990, -76.71119 39.28076, -76.71045 39.27932, -76.71045 39.27920, -76.71056 39.27907, -76.71123 39.27880, -76.71122 39.27927)) |
| 3 | 4 | Block 245102716006003 | #fff | 4.999286 | NaN | POLYGON ((-76.66960 39.34512, -76.66905 39.34526, -76.66886 39.34531, -76.66848 39.34539, -76.66835 39.34502, -76.66867 39.34498, -76.66924 39.34484, -76.66946 39.34475, -76.66960 39.34512)) |
| 4 | 5 | Block 245102709023013 | #fff | 4.999286 | NaN | POLYGON ((-76.58976 39.35401, -76.58972 39.35415, -76.58966 39.35435, -76.58964 39.35447, -76.58960 39.35469, -76.58940 39.35467, -76.58925 39.35465, -76.58912 39.35465, -76.58902 39.35464, -76.58898 39.35464, -76.58906 39.35425, -76.58917 39.35385, -76.58925 39.35385, -76.58939 39.35386, -76.58979 39.35393, -76.58976 39.35401)) |
| CSA2010 | NumberMissingCount | |
|---|---|---|
| 0 | Allendale/Irvington/S. Hilton | 290 |
| 1 | Beechfield/Ten Hills/West Hills | 180 |
| 2 | Belair-Edison | 250 |
| 3 | Brooklyn/Curtis Bay/Hawkins Point | 580 |
| 4 | Canton | 0 |
Ensure merge is on consistent datatypes
gdf['GEOID10'] = gdf['GEOID10'].astype(str) scooterdf['nameChange2'] = scooterdf['nameChange2'].astype(str)Perform the merge
scooterdfClean = gdf.merge(scooterdf, left_on='GEOID10', right_on='nameChange2').drop(['name', 'nameChange1', 'nameChange2'], axis=1)scooterdfClean.head()# scooterdf.to_csv('./scooterdf.csv', index=False) # gdf.drop(columns='geometry').to_csv('./boundsdf.csv', index=False)lsscooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.value.isna().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='notApplicable')scooterdfClean.value.notnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NotMissingCount')scooterdfClean.value.isnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NumberMissingCount')scooterdfClean.fillna(-1).groupby('CSA')['value'].sum()scooterdfClean.groupby('CSA')['value'].mean()scooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.CSA.value_counts()https://pandas.pydata.org/docs/getting_started/intro_tutorials/06_calculate_statistics.html| id | color | streetname | trip_count_sum | trip_count_average | trip_count_percent | geometry | |
|---|---|---|---|---|---|---|---|
| 0 | 150197772 | rgb(218, 231, 241) | Jefferson Street | 24 | 0.774194 | 0.0% | LINESTRING (-76.58576 39.29660, -76.58550 39.29661) |
| 1 | 150155955 | rgb(164, 190, 219) | 206 | 6.645161 | 0.3% | LINESTRING (-76.61320 39.28136, -76.61318 39.28115) | |
| 2 | 150191673 | rgb(204, 221, 236) | Harford Avenue | 49 | 1.580645 | 0.1% | LINESTRING (-76.60169 39.30535, -76.60163 39.30544, -76.60156 39.30555) |
| 3 | 150184657 | rgb(229, 239, 246) | 11 | 0.354839 | 0.0% | LINESTRING (-76.61493 39.29294, -76.61491 39.29258, -76.61490 39.29230) | |
| 4 | 150188407 | rgb(169, 194, 221) | 178 | 5.741935 | 0.2% | LINESTRING (-76.61662 39.29053, -76.61661 39.29046) |
Clean the gdf of empties.
gdf = gdf[~gdf.isna()] gdf = gdf[~gdf.is_empty]Now get the extremities; this will take a minute.
# hide_output gdf['lefty'], gdf['leftx'],gdf['righty'], gdf['rightx'] = zip(*gdf["geometry"].map(split))Split the gdf into a left and right dataset
gdf_left = gdf.copy() gdf_left = gdf_left.drop(columns = ['geometry','rightx', 'righty']) gdf_right= gdf.copy() gdf_right = gdf_right.drop(columns = ['geometry','leftx', 'lefty', 'streetname', 'trip_count_sum', 'trip_count_average', 'trip_count_percent', 'color' ])The coordinate variables of object type will cause problems.
gdf_right.dtypesid int64 righty float64 rightx object dtype: objectLet's go ahead and coerce a correction.
gdf_right['righty']=pd.to_numeric(gdf_right['righty'], errors='coerce') gdf_left['lefty']=pd.to_numeric(gdf_left['lefty'], errors='coerce') gdf_right['rightx']=pd.to_numeric(gdf_right['rightx'], errors='coerce') gdf_left['leftx']=pd.to_numeric(gdf_left['leftx'], errors='coerce')Now we can view the results
gdf_right.dtypesid int64 righty float64 rightx float64 dtype: objectSave these csv's because it took a minute to get to where we are now.
gdf_right.to_csv('rightRouts.csv') gdf_left.to_csv('leftRouts.csv')Convert the datasets to geodataframes for further exploration!
# We could create a geodataframe like this #temp_gdf = gpd.GeoDataFrame( gdf_right, geometry=gpd.points_from_xy(gdf_right.rightx, gdf_right.righty) ) #temp_gdf.head() # Alternately this could work.. unfinished.. but wkt.loads can make a Point from text # gdf_right['strCol']=gdf_right['rightx'].astype(str) # gdf_right['geometry'] = gdf_right['strCol'].apply(wkt.loads)# hide_output left_csaMap = readInGeometryData(url=gdf_left, porg='p', geom= False, lat= 'leftx', lng= 'lefty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)# hide_output right_csaMap = readInGeometryData(url=gdf_right, porg='p', geom= False, lat= 'rightx', lng= 'righty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)right_csaMap.head()| id | righty | rightx | geometry | |
|---|---|---|---|---|
| 0 | 150197772 | -76.585503 | 39.296607 | POINT (-76.58550 39.29661) |
| 1 | 150155955 | -76.613183 | 39.281147 | POINT (-76.61318 39.28115) |
| 2 | 150191673 | -76.601555 | 39.305545 | POINT (-76.60156 39.30555) |
| 3 | 150184657 | -76.614899 | 39.292301 | POINT (-76.61490 39.29230) |
| 4 | 150188407 | -76.616608 | 39.290458 | POINT (-76.61661 39.29046) |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |
This chapter has brought to you in collaboration by Brian Kelly, Michael Vandi, Logan Shertz, and Charles Karpati.
Dataset: Scooter data:
File path's will vary
# cd ./drive/'My Drive'/BNIA/responsive_records/Routes# cd ../content/drive/My Drive/DATA/scooter/content/drive/My Drive/DATA/scooter # Michael's Directory # cd ./drive/'My Drive'/BNIA/'Scooter Use Data'/BNIAcd ./Routes/content/drive/My Drive/DATA/scooter/Routes # hide_output ! ls leftRouts.csv 'Routing November 2019.geojson' rightRouts.csv 'Routing October 2019.geojson' 'Routing August 2019.geojson' 'Routing September 2019.geojson' 'Routing December 2019.geojson' # hide_output !cd ../ && ls boundsdf.csv rightRouts.csv 'Trip origins-destinations by month' Deployment Routes leftRouts.csv scooterdf.csv| id | name | color | radius | value | geometry | |
|---|---|---|---|---|---|---|
| 0 | 1 | Block 245102502063018 | #fff | 4.999286 | NaN | POLYGON ((-76.66168 39.26342, -76.66136 39.26373, -76.66096 39.26343, -76.66041 39.26306, -76.66053 39.26296, -76.66070 39.26281, -76.66077 39.26284, -76.66100 39.26296, -76.66117 39.26306, -76.66129 39.26315, -76.66141 39.26322, -76.66168 39.26342)) |
| 1 | 2 | Block 245102006001022 | #fff | 4.999286 | NaN | POLYGON ((-76.66319 39.28427, -76.66306 39.28436, -76.66301 39.28439, -76.66293 39.28439, -76.66271 39.28418, -76.66303 39.28402, -76.66317 39.28417, -76.66320 39.28423, -76.66319 39.28427)) |
| 2 | 3 | Block 245102804033017 | #fff | 4.999286 | NaN | POLYGON ((-76.71122 39.27927, -76.71121 39.27975, -76.71121 39.27990, -76.71119 39.28076, -76.71045 39.27932, -76.71045 39.27920, -76.71056 39.27907, -76.71123 39.27880, -76.71122 39.27927)) |
| 3 | 4 | Block 245102716006003 | #fff | 4.999286 | NaN | POLYGON ((-76.66960 39.34512, -76.66905 39.34526, -76.66886 39.34531, -76.66848 39.34539, -76.66835 39.34502, -76.66867 39.34498, -76.66924 39.34484, -76.66946 39.34475, -76.66960 39.34512)) |
| 4 | 5 | Block 245102709023013 | #fff | 4.999286 | NaN | POLYGON ((-76.58976 39.35401, -76.58972 39.35415, -76.58966 39.35435, -76.58964 39.35447, -76.58960 39.35469, -76.58940 39.35467, -76.58925 39.35465, -76.58912 39.35465, -76.58902 39.35464, -76.58898 39.35464, -76.58906 39.35425, -76.58917 39.35385, -76.58925 39.35385, -76.58939 39.35386, -76.58979 39.35393, -76.58976 39.35401)) |
| CSA2010 | NumberMissingCount | |
|---|---|---|
| 0 | Allendale/Irvington/S. Hilton | 290 |
| 1 | Beechfield/Ten Hills/West Hills | 180 |
| 2 | Belair-Edison | 250 |
| 3 | Brooklyn/Curtis Bay/Hawkins Point | 580 |
| 4 | Canton | 0 |
Ensure merge is on consistent datatypes
gdf['GEOID10'] = gdf['GEOID10'].astype(str) scooterdf['nameChange2'] = scooterdf['nameChange2'].astype(str)Perform the merge
scooterdfClean = gdf.merge(scooterdf, left_on='GEOID10', right_on='nameChange2').drop(['name', 'nameChange1', 'nameChange2'], axis=1)scooterdfClean.head()# scooterdf.to_csv('./scooterdf.csv', index=False) # gdf.drop(columns='geometry').to_csv('./boundsdf.csv', index=False)lsscooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.value.isna().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='notApplicable')scooterdfClean.value.notnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NotMissingCount')scooterdfClean.value.isnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NumberMissingCount')scooterdfClean.fillna(-1).groupby('CSA')['value'].sum()scooterdfClean.groupby('CSA')['value'].mean()scooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.CSA.value_counts()https://pandas.pydata.org/docs/getting_started/intro_tutorials/06_calculate_statistics.html| id | color | streetname | trip_count_sum | trip_count_average | trip_count_percent | geometry | |
|---|---|---|---|---|---|---|---|
| 0 | 150197772 | rgb(218, 231, 241) | Jefferson Street | 24 | 0.774194 | 0.0% | LINESTRING (-76.58576 39.29660, -76.58550 39.29661) |
| 1 | 150155955 | rgb(164, 190, 219) | 206 | 6.645161 | 0.3% | LINESTRING (-76.61320 39.28136, -76.61318 39.28115) | |
| 2 | 150191673 | rgb(204, 221, 236) | Harford Avenue | 49 | 1.580645 | 0.1% | LINESTRING (-76.60169 39.30535, -76.60163 39.30544, -76.60156 39.30555) |
| 3 | 150184657 | rgb(229, 239, 246) | 11 | 0.354839 | 0.0% | LINESTRING (-76.61493 39.29294, -76.61491 39.29258, -76.61490 39.29230) | |
| 4 | 150188407 | rgb(169, 194, 221) | 178 | 5.741935 | 0.2% | LINESTRING (-76.61662 39.29053, -76.61661 39.29046) |
Clean the gdf of empties.
gdf = gdf[~gdf.isna()] gdf = gdf[~gdf.is_empty]Now get the extremities; this will take a minute.
# hide_output gdf['lefty'], gdf['leftx'],gdf['righty'], gdf['rightx'] = zip(*gdf["geometry"].map(split))Split the gdf into a left and right dataset
gdf_left = gdf.copy() gdf_left = gdf_left.drop(columns = ['geometry','rightx', 'righty']) gdf_right= gdf.copy() gdf_right = gdf_right.drop(columns = ['geometry','leftx', 'lefty', 'streetname', 'trip_count_sum', 'trip_count_average', 'trip_count_percent', 'color' ])The coordinate variables of object type will cause problems.
gdf_right.dtypesid int64 righty float64 rightx object dtype: objectLet's go ahead and coerce a correction.
gdf_right['righty']=pd.to_numeric(gdf_right['righty'], errors='coerce') gdf_left['lefty']=pd.to_numeric(gdf_left['lefty'], errors='coerce') gdf_right['rightx']=pd.to_numeric(gdf_right['rightx'], errors='coerce') gdf_left['leftx']=pd.to_numeric(gdf_left['leftx'], errors='coerce')Now we can view the results
gdf_right.dtypesid int64 righty float64 rightx float64 dtype: objectSave these csv's because it took a minute to get to where we are now.
gdf_right.to_csv('rightRouts.csv') gdf_left.to_csv('leftRouts.csv')Convert the datasets to geodataframes for further exploration!
# We could create a geodataframe like this #temp_gdf = gpd.GeoDataFrame( gdf_right, geometry=gpd.points_from_xy(gdf_right.rightx, gdf_right.righty) ) #temp_gdf.head() # Alternately this could work.. unfinished.. but wkt.loads can make a Point from text # gdf_right['strCol']=gdf_right['rightx'].astype(str) # gdf_right['geometry'] = gdf_right['strCol'].apply(wkt.loads)# hide_output left_csaMap = readInGeometryData(url=gdf_left, porg='p', geom= False, lat= 'leftx', lng= 'lefty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)# hide_output right_csaMap = readInGeometryData(url=gdf_right, porg='p', geom= False, lat= 'rightx', lng= 'righty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)right_csaMap.head()| id | righty | rightx | geometry | |
|---|---|---|---|---|
| 0 | 150197772 | -76.585503 | 39.296607 | POINT (-76.58550 39.29661) |
| 1 | 150155955 | -76.613183 | 39.281147 | POINT (-76.61318 39.28115) |
| 2 | 150191673 | -76.601555 | 39.305545 | POINT (-76.60156 39.30555) |
| 3 | 150184657 | -76.614899 | 39.292301 | POINT (-76.61490 39.29230) |
| 4 | 150188407 | -76.616608 | 39.290458 | POINT (-76.61661 39.29046) |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |
This chapter has brought to you in collaboration by Brian Kelly, Michael Vandi, Logan Shertz, and Charles Karpati.
Dataset: Scooter data:
File path's will vary
# cd ./drive/'My Drive'/BNIA/responsive_records/Routes# cd ../content/drive/My Drive/DATA/scooter/content/drive/My Drive/DATA/scooter # Michael's Directory # cd ./drive/'My Drive'/BNIA/'Scooter Use Data'/BNIAcd ./Routes/content/drive/My Drive/DATA/scooter/Routes # hide_output ! ls leftRouts.csv 'Routing November 2019.geojson' rightRouts.csv 'Routing October 2019.geojson' 'Routing August 2019.geojson' 'Routing September 2019.geojson' 'Routing December 2019.geojson' # hide_output !cd ../ && ls boundsdf.csv rightRouts.csv 'Trip origins-destinations by month' Deployment Routes leftRouts.csv scooterdf.csv| id | name | color | radius | value | geometry | |
|---|---|---|---|---|---|---|
| 0 | 1 | Block 245102502063018 | #fff | 4.999286 | NaN | POLYGON ((-76.66168 39.26342, -76.66136 39.26373, -76.66096 39.26343, -76.66041 39.26306, -76.66053 39.26296, -76.66070 39.26281, -76.66077 39.26284, -76.66100 39.26296, -76.66117 39.26306, -76.66129 39.26315, -76.66141 39.26322, -76.66168 39.26342)) |
| 1 | 2 | Block 245102006001022 | #fff | 4.999286 | NaN | POLYGON ((-76.66319 39.28427, -76.66306 39.28436, -76.66301 39.28439, -76.66293 39.28439, -76.66271 39.28418, -76.66303 39.28402, -76.66317 39.28417, -76.66320 39.28423, -76.66319 39.28427)) |
| 2 | 3 | Block 245102804033017 | #fff | 4.999286 | NaN | POLYGON ((-76.71122 39.27927, -76.71121 39.27975, -76.71121 39.27990, -76.71119 39.28076, -76.71045 39.27932, -76.71045 39.27920, -76.71056 39.27907, -76.71123 39.27880, -76.71122 39.27927)) |
| 3 | 4 | Block 245102716006003 | #fff | 4.999286 | NaN | POLYGON ((-76.66960 39.34512, -76.66905 39.34526, -76.66886 39.34531, -76.66848 39.34539, -76.66835 39.34502, -76.66867 39.34498, -76.66924 39.34484, -76.66946 39.34475, -76.66960 39.34512)) |
| 4 | 5 | Block 245102709023013 | #fff | 4.999286 | NaN | POLYGON ((-76.58976 39.35401, -76.58972 39.35415, -76.58966 39.35435, -76.58964 39.35447, -76.58960 39.35469, -76.58940 39.35467, -76.58925 39.35465, -76.58912 39.35465, -76.58902 39.35464, -76.58898 39.35464, -76.58906 39.35425, -76.58917 39.35385, -76.58925 39.35385, -76.58939 39.35386, -76.58979 39.35393, -76.58976 39.35401)) |
| CSA2010 | NumberMissingCount | |
|---|---|---|
| 0 | Allendale/Irvington/S. Hilton | 290 |
| 1 | Beechfield/Ten Hills/West Hills | 180 |
| 2 | Belair-Edison | 250 |
| 3 | Brooklyn/Curtis Bay/Hawkins Point | 580 |
| 4 | Canton | 0 |
Ensure merge is on consistent datatypes
gdf['GEOID10'] = gdf['GEOID10'].astype(str) scooterdf['nameChange2'] = scooterdf['nameChange2'].astype(str)Perform the merge
scooterdfClean = gdf.merge(scooterdf, left_on='GEOID10', right_on='nameChange2').drop(['name', 'nameChange1', 'nameChange2'], axis=1)scooterdfClean.head()# scooterdf.to_csv('./scooterdf.csv', index=False) # gdf.drop(columns='geometry').to_csv('./boundsdf.csv', index=False)lsscooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.value.isna().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='notApplicable')scooterdfClean.value.notnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NotMissingCount')scooterdfClean.value.isnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NumberMissingCount')scooterdfClean.fillna(-1).groupby('CSA')['value'].sum()scooterdfClean.groupby('CSA')['value'].mean()scooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.CSA.value_counts()https://pandas.pydata.org/docs/getting_started/intro_tutorials/06_calculate_statistics.html| id | color | streetname | trip_count_sum | trip_count_average | trip_count_percent | geometry | |
|---|---|---|---|---|---|---|---|
| 0 | 150197772 | rgb(218, 231, 241) | Jefferson Street | 24 | 0.774194 | 0.0% | LINESTRING (-76.58576 39.29660, -76.58550 39.29661) |
| 1 | 150155955 | rgb(164, 190, 219) | 206 | 6.645161 | 0.3% | LINESTRING (-76.61320 39.28136, -76.61318 39.28115) | |
| 2 | 150191673 | rgb(204, 221, 236) | Harford Avenue | 49 | 1.580645 | 0.1% | LINESTRING (-76.60169 39.30535, -76.60163 39.30544, -76.60156 39.30555) |
| 3 | 150184657 | rgb(229, 239, 246) | 11 | 0.354839 | 0.0% | LINESTRING (-76.61493 39.29294, -76.61491 39.29258, -76.61490 39.29230) | |
| 4 | 150188407 | rgb(169, 194, 221) | 178 | 5.741935 | 0.2% | LINESTRING (-76.61662 39.29053, -76.61661 39.29046) |
Clean the gdf of empties.
gdf = gdf[~gdf.isna()] gdf = gdf[~gdf.is_empty]Now get the extremities; this will take a minute.
# hide_output gdf['lefty'], gdf['leftx'],gdf['righty'], gdf['rightx'] = zip(*gdf["geometry"].map(split))Split the gdf into a left and right dataset
gdf_left = gdf.copy() gdf_left = gdf_left.drop(columns = ['geometry','rightx', 'righty']) gdf_right= gdf.copy() gdf_right = gdf_right.drop(columns = ['geometry','leftx', 'lefty', 'streetname', 'trip_count_sum', 'trip_count_average', 'trip_count_percent', 'color' ])The coordinate variables of object type will cause problems.
gdf_right.dtypesid int64 righty float64 rightx object dtype: objectLet's go ahead and coerce a correction.
gdf_right['righty']=pd.to_numeric(gdf_right['righty'], errors='coerce') gdf_left['lefty']=pd.to_numeric(gdf_left['lefty'], errors='coerce') gdf_right['rightx']=pd.to_numeric(gdf_right['rightx'], errors='coerce') gdf_left['leftx']=pd.to_numeric(gdf_left['leftx'], errors='coerce')Now we can view the results
gdf_right.dtypesid int64 righty float64 rightx float64 dtype: objectSave these csv's because it took a minute to get to where we are now.
gdf_right.to_csv('rightRouts.csv') gdf_left.to_csv('leftRouts.csv')Convert the datasets to geodataframes for further exploration!
# We could create a geodataframe like this #temp_gdf = gpd.GeoDataFrame( gdf_right, geometry=gpd.points_from_xy(gdf_right.rightx, gdf_right.righty) ) #temp_gdf.head() # Alternately this could work.. unfinished.. but wkt.loads can make a Point from text # gdf_right['strCol']=gdf_right['rightx'].astype(str) # gdf_right['geometry'] = gdf_right['strCol'].apply(wkt.loads)# hide_output left_csaMap = readInGeometryData(url=gdf_left, porg='p', geom= False, lat= 'leftx', lng= 'lefty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)# hide_output right_csaMap = readInGeometryData(url=gdf_right, porg='p', geom= False, lat= 'rightx', lng= 'righty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)right_csaMap.head()| id | righty | rightx | geometry | |
|---|---|---|---|---|
| 0 | 150197772 | -76.585503 | 39.296607 | POINT (-76.58550 39.29661) |
| 1 | 150155955 | -76.613183 | 39.281147 | POINT (-76.61318 39.28115) |
| 2 | 150191673 | -76.601555 | 39.305545 | POINT (-76.60156 39.30555) |
| 3 | 150184657 | -76.614899 | 39.292301 | POINT (-76.61490 39.29230) |
| 4 | 150188407 | -76.616608 | 39.290458 | POINT (-76.61661 39.29046) |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |
This chapter has brought to you in collaboration by Brian Kelly, Michael Vandi, Logan Shertz, and Charles Karpati.
Dataset: Scooter data:
File path's will vary
# cd ./drive/'My Drive'/BNIA/responsive_records/Routes# cd ../content/drive/My Drive/DATA/scooter/content/drive/My Drive/DATA/scooter # Michael's Directory # cd ./drive/'My Drive'/BNIA/'Scooter Use Data'/BNIAcd ./Routes/content/drive/My Drive/DATA/scooter/Routes # hide_output ! ls leftRouts.csv 'Routing November 2019.geojson' rightRouts.csv 'Routing October 2019.geojson' 'Routing August 2019.geojson' 'Routing September 2019.geojson' 'Routing December 2019.geojson' # hide_output !cd ../ && ls boundsdf.csv rightRouts.csv 'Trip origins-destinations by month' Deployment Routes leftRouts.csv scooterdf.csv| id | name | color | radius | value | geometry | |
|---|---|---|---|---|---|---|
| 0 | 1 | Block 245102502063018 | #fff | 4.999286 | NaN | POLYGON ((-76.66168 39.26342, -76.66136 39.26373, -76.66096 39.26343, -76.66041 39.26306, -76.66053 39.26296, -76.66070 39.26281, -76.66077 39.26284, -76.66100 39.26296, -76.66117 39.26306, -76.66129 39.26315, -76.66141 39.26322, -76.66168 39.26342)) |
| 1 | 2 | Block 245102006001022 | #fff | 4.999286 | NaN | POLYGON ((-76.66319 39.28427, -76.66306 39.28436, -76.66301 39.28439, -76.66293 39.28439, -76.66271 39.28418, -76.66303 39.28402, -76.66317 39.28417, -76.66320 39.28423, -76.66319 39.28427)) |
| 2 | 3 | Block 245102804033017 | #fff | 4.999286 | NaN | POLYGON ((-76.71122 39.27927, -76.71121 39.27975, -76.71121 39.27990, -76.71119 39.28076, -76.71045 39.27932, -76.71045 39.27920, -76.71056 39.27907, -76.71123 39.27880, -76.71122 39.27927)) |
| 3 | 4 | Block 245102716006003 | #fff | 4.999286 | NaN | POLYGON ((-76.66960 39.34512, -76.66905 39.34526, -76.66886 39.34531, -76.66848 39.34539, -76.66835 39.34502, -76.66867 39.34498, -76.66924 39.34484, -76.66946 39.34475, -76.66960 39.34512)) |
| 4 | 5 | Block 245102709023013 | #fff | 4.999286 | NaN | POLYGON ((-76.58976 39.35401, -76.58972 39.35415, -76.58966 39.35435, -76.58964 39.35447, -76.58960 39.35469, -76.58940 39.35467, -76.58925 39.35465, -76.58912 39.35465, -76.58902 39.35464, -76.58898 39.35464, -76.58906 39.35425, -76.58917 39.35385, -76.58925 39.35385, -76.58939 39.35386, -76.58979 39.35393, -76.58976 39.35401)) |
| CSA2010 | NumberMissingCount | |
|---|---|---|
| 0 | Allendale/Irvington/S. Hilton | 290 |
| 1 | Beechfield/Ten Hills/West Hills | 180 |
| 2 | Belair-Edison | 250 |
| 3 | Brooklyn/Curtis Bay/Hawkins Point | 580 |
| 4 | Canton | 0 |
Ensure merge is on consistent datatypes
gdf['GEOID10'] = gdf['GEOID10'].astype(str) scooterdf['nameChange2'] = scooterdf['nameChange2'].astype(str)Perform the merge
scooterdfClean = gdf.merge(scooterdf, left_on='GEOID10', right_on='nameChange2').drop(['name', 'nameChange1', 'nameChange2'], axis=1)scooterdfClean.head()# scooterdf.to_csv('./scooterdf.csv', index=False) # gdf.drop(columns='geometry').to_csv('./boundsdf.csv', index=False)lsscooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.value.isna().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='notApplicable')scooterdfClean.value.notnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NotMissingCount')scooterdfClean.value.isnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NumberMissingCount')scooterdfClean.fillna(-1).groupby('CSA')['value'].sum()scooterdfClean.groupby('CSA')['value'].mean()scooterdfClean.groupby('CSA')['value'].sum()scooterdfClean.CSA.value_counts()https://pandas.pydata.org/docs/getting_started/intro_tutorials/06_calculate_statistics.html| id | color | streetname | trip_count_sum | trip_count_average | trip_count_percent | geometry | |
|---|---|---|---|---|---|---|---|
| 0 | 150197772 | rgb(218, 231, 241) | Jefferson Street | 24 | 0.774194 | 0.0% | LINESTRING (-76.58576 39.29660, -76.58550 39.29661) |
| 1 | 150155955 | rgb(164, 190, 219) | 206 | 6.645161 | 0.3% | LINESTRING (-76.61320 39.28136, -76.61318 39.28115) | |
| 2 | 150191673 | rgb(204, 221, 236) | Harford Avenue | 49 | 1.580645 | 0.1% | LINESTRING (-76.60169 39.30535, -76.60163 39.30544, -76.60156 39.30555) |
| 3 | 150184657 | rgb(229, 239, 246) | 11 | 0.354839 | 0.0% | LINESTRING (-76.61493 39.29294, -76.61491 39.29258, -76.61490 39.29230) | |
| 4 | 150188407 | rgb(169, 194, 221) | 178 | 5.741935 | 0.2% | LINESTRING (-76.61662 39.29053, -76.61661 39.29046) |
Clean the gdf of empties.
gdf = gdf[~gdf.isna()] gdf = gdf[~gdf.is_empty]Now get the extremities; this will take a minute.
# hide_output gdf['lefty'], gdf['leftx'],gdf['righty'], gdf['rightx'] = zip(*gdf["geometry"].map(split))Split the gdf into a left and right dataset
gdf_left = gdf.copy() gdf_left = gdf_left.drop(columns = ['geometry','rightx', 'righty']) gdf_right= gdf.copy() gdf_right = gdf_right.drop(columns = ['geometry','leftx', 'lefty', 'streetname', 'trip_count_sum', 'trip_count_average', 'trip_count_percent', 'color' ])The coordinate variables of object type will cause problems.
gdf_right.dtypesid int64 righty float64 rightx object dtype: objectLet's go ahead and coerce a correction.
gdf_right['righty']=pd.to_numeric(gdf_right['righty'], errors='coerce') gdf_left['lefty']=pd.to_numeric(gdf_left['lefty'], errors='coerce') gdf_right['rightx']=pd.to_numeric(gdf_right['rightx'], errors='coerce') gdf_left['leftx']=pd.to_numeric(gdf_left['leftx'], errors='coerce')Now we can view the results
gdf_right.dtypesid int64 righty float64 rightx float64 dtype: objectSave these csv's because it took a minute to get to where we are now.
gdf_right.to_csv('rightRouts.csv') gdf_left.to_csv('leftRouts.csv')Convert the datasets to geodataframes for further exploration!
# We could create a geodataframe like this #temp_gdf = gpd.GeoDataFrame( gdf_right, geometry=gpd.points_from_xy(gdf_right.rightx, gdf_right.righty) ) #temp_gdf.head() # Alternately this could work.. unfinished.. but wkt.loads can make a Point from text # gdf_right['strCol']=gdf_right['rightx'].astype(str) # gdf_right['geometry'] = gdf_right['strCol'].apply(wkt.loads)# hide_output left_csaMap = readInGeometryData(url=gdf_left, porg='p', geom= False, lat= 'leftx', lng= 'lefty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)# hide_output right_csaMap = readInGeometryData(url=gdf_right, porg='p', geom= False, lat= 'rightx', lng= 'righty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)right_csaMap.head()| id | righty | rightx | geometry | |
|---|---|---|---|---|
| 0 | 150197772 | -76.585503 | 39.296607 | POINT (-76.58550 39.29661) |
| 1 | 150155955 | -76.613183 | 39.281147 | POINT (-76.61318 39.28115) |
| 2 | 150191673 | -76.601555 | 39.305545 | POINT (-76.60156 39.30555) |
| 3 | 150184657 | -76.614899 | 39.292301 | POINT (-76.61490 39.29230) |
| 4 | 150188407 | -76.616608 | 39.290458 | POINT (-76.61661 39.29046) |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |
| pointsinpolygon | |
|---|---|
| 0 | 18 |
| 1 | 0 |
| 2 | 67 |
| 3 | 0 |
| 4 | 1536 |
| 5 | 18 |
| 6 | 32 |
| 7 | 8 |
| 8 | 1 |
| 9 | 65 |
| 10 | 0 |
| 11 | 0 |
| 12 | 13 |
| 13 | 1764 |
| 14 | 0 |
| 15 | 760 |
| 16 | 0 |
| 17 | 85 |
| 18 | 1253 |
| 19 | 2 |